/*******************************************************************************

[Last updated: June 4th, 2024]

This script cleans and organizes Qualtrics data for Sid-D Assessment Analysis

The script is organized as follows:

	Part A: Coding the Pre-Virtra Data
	Part B: Coding the Rubrics Data
	Part C: Coding the Post-Virtra Data
	
	

*******************************************************************************/

clear all

	* Raw survey data
	gl raw_data ${FilePath11}
	gl pre_v	${raw_data}\Sit-D Endline Assessment - Pre-VirTra_July 12, 2021_18.46.sav
	gl post_v	${raw_data}\Sit-D Endline Assessment - Post-VirTra_July 12, 2021_18.48.sav
	gl rubrics	${raw_data}\Sit-D Endline Assessment - VirTra Rubric_July 12, 2021_18.48.sav
	
	* Qualitatively coded duplicated records
	gl e_data 	${FilePath12}
	gl dup_pre	${e_data}\SitD_End_Duplicates_pre_main.dta
	gl dup_post	${e_data}\SitD_End_Duplicates_post.dta
	gl dup_rub	${e_data}\SitD_End_Duplicates_rubric.dta
	
	* Ouput
	gl o_data 	${FilePath13}
	gl out_pre	${o_data}\assessment_pre
	gl out_pos	${o_data}\assessment_pos
	gl out_rub	${o_data}\assessment_rub

	* Versions
	gl version online
	
		/* 3 options: 
		base: 	no incomplete, no duplicate, excluding online responses
		online: no incomplete, no duplicate, including online responses 
		full: 	including incomplete, duplicate, and online responses 
		*/

		
		
		
// PROGRAM DEFINITION //////////////////////////////////////////////////////////

* Small program to import, sub-set, and deduplicate Qualtrics data

program sample
args data

	* Import data in SPSS form - so all labels and values are retained
	import spss using "`data'", case(lower) clear
		count
	
	* Survey ID
	gen survey_id = q1
	
	* Retain only true response
	keep if (inrange(q1, 2000, 3995) & !inlist(q1, 2222))
	
	* Check for the date range of assessment
	gen date = dofc(startdate)
	gen regular_date = ///
		inrange(date, date("16mar2021","DMY"), date("24apr2021","DMY")) 
	
	* Online assessment: assessment done out of regular date, finished
	gen online = (regular_date == 0 & finished == 1)

end









// CLEANING QUALTRICS RESPONSES : PART A ///////////////////////////////////////
// Section 5.1 -----------------------------------------------------------------

sample "$pre_v"

		if inlist("$version", "base", "online") {
		    
		* For records that were vetted
		merge m:1 responseid using $dup_pre, keep (1 2 3) nogen
		drop if (def_drop == 1 | unmatched == 1)
	
		* For incomplete survey: temporarily drop
		drop if progress < 100
		
		* For the two additional online survey - Decision made
		sort q1 startdate
		by q1: drop if (_n == 2) & inlist(q1, 3743)
		drop if inlist(q1, 2415)
		}
		
		if inlist("$version", "base"){
		drop if (online == 1)
		}

	* Check the sample size
	tab online
	
* (5.1.1) ----------------------------------------------------------------------

	* Q3/4/5/6: Score 1 for each correct answer
	gen q3_score = (q3 == 1) if !missing(q3)
	gen q4_score = (q4 == 2) if !missing(q4)
	gen q5_score = (q5 == 1) if !missing(q5)
	gen q6_score = (q6 == 2) if !missing(q6)
	
	* Q8: score by fraction
	forvalues s = 1(1)8 {
	    gen q8_sub_score_`s' = (q8_`s' == 1)
		
		if inlist(`s', 2, 5, 8){
		replace q8_sub_score_`s' = (q8_`s' == .)	
		}
	}
	egen q8_score = rowmean(q8_sub_score*)
	
	* True missing for Q8: Those who get 3/8 for not selecting anything
	replace q8_score = . if ///
						(q8_1 == . & q8_2 ==. & q8_3 == . & q8_4 == . & ///
						 q8_5 == . & q8_6 ==. & q8_7 == . & q8_8 ==.)
					  
* (5.1.2) ----------------------------------------------------------------------
	
* (5.1.2.P.1)	Reverse coding
	gen q17_1_score = (6-q17_1) + 1
	
* (5.1.2.S.1)	Reverse coding
	gen q17_2_score = (6-q17_2) + 1
	gen q17_3_score = (6-q17_3) + 1
	
* (5.1.2.S.2)	Reverse coding for 2 out of 3 items
	gen q15_1_score = q15_1
	gen q15_2_score = (6-q15_2) + 1
	gen q15_3_score = (6-q15_3) + 1
	
* (5.1.2.S.3)	Normal coding	
	forvalues s = 1(1)5 {
	gen q13_`s'_score = q13_`s'
	}
	
* (5.1.3.p.1)																	
	foreach q in 20 71 75 77 80 {
	gen q`q'_score = q`q'
	}

* (5.1.3.p.2) Quantitatively coded - Skipped

* (5.1.3.p.3) Timing, log and level data, outlier to code as missing
	foreach q in 25 27 29 {
	    recode responsetime_qid`q' (1000000/max = .), gen(timing_`q'_level)
		replace timing_`q'_level = (timing_`q'_level / 1000)
		gen timing_`q'_log = ln(timing_`q'_level)
	}

* (5.1.3.p.4) Timing, log and level data, outlier to code as missing
	foreach q in 26 28 30 33 36 {
	    recode responsetime_qid`q' (1000000/max = .) , gen(timing_`q'_level)
		replace timing_`q'_level = (timing_`q'_level/ 1000)
		gen timing_`q'_log = ln(timing_`q'_level) 
	}

* (5.1.3.p.5) Quantitatively coded - Skipped
	
* (5.1.3.s.1) Interpretation choice
	* Current data code 1 vs 2 (crime) in some and 2 vs 1 (crime) in other
	* For consistency, I re-code this as crime (1 if not a crime, 2 as a crime)
	recode q26 (1 = 0) (2 = 1), gen(q26_score)
	recode q28 (1 = 1) (2 = 0), gen(q28_score)
	recode q30 (1 = 0) (2 = 1), gen(q30_score)
	recode q33 (1 = 1) (2 = 0), gen(q33_score)
	recode q36 (1 = 0) (2 = 1), gen(q36_score)
	
* (5.1.4.p.1) - Assailant Level & Force Level

	* Recoding for consistency
		
	* 4-s4ale question - Reverse
	foreach q in 48 53_1 106_1 121 {
	gen q`q'_level = (4-q`q') + 1 
	}
		
	* 9-scale question
	
	* Correct scaling (9 is deadly, 8 is high)
	foreach q in 50 {
	recode q`q' (7 8 = 7) 	(9 = 8), gen(q`q'_level)
	}
	
	* Scalling off (9 is high, 8 is deadly)
	foreach q in 55 108 123{
    recode q`q' (7 9 = 7) 	(8 = 8), gen(q`q'_level)
	}
	
	* 9-scale question, no recode
	foreach q in 51 56 109 124 {
	gen q`q'_level = q`q'
	}
	
	* (a) assailant level recode & distance calculation	
	foreach q in 50 108 {
		gen q`q'_score = (q`q'_level == 7) if !missing(q`q'_level)
		gen q`q'_dis   = q`q'_level - 7
	}
	
	* (b) force level
	recode q51	(1/8 = 1) (9 = 0), gen(q51_score)
	recode q109	(1/8 = 1) (9 = 0), gen(q109_score)
	
* (5.1.4.p.2) - Qualitatively coded - Skipped

* (5.1.4.p.3)								
	gen q53121_score = (q121_level - q53_1_level)
	gen q55123_score = (q123_level - q55_level)
	gen q56124_score = (q124_level - q56_level)
	
* (5.1.4.s.1) - coded above

	
* (5.1.4.s.2) Assailant level // Already coded in (5.1.4.s.1)
	
* Save

	save ${out_pre}_${version}, replace

	
	
	
	
	
	
// CLEANING QUALTRICS RESPONSES : PART B ///////////////////////////////////////
// Section 5.2 -----------------------------------------------------------------

sample "$rubrics"

		if inlist("$version", "base", "online") {
		    
		* For records that were vetted
		merge m:1 responseid using $dup_rub, keep (1 2 3) nogen
		drop if (def_drop == 1 | unmatched == 1)
	
		* For incomplete survey: temporarily drop
		drop if progress < 100
		
		* For the two additional online survey - Decision made, confirmed by Oeindrila
		sort q1 startdate
		by q1: drop if (_n == 2) & inlist(q1, 3743)
		drop if inlist(q1, 2415)
		}
		
		if inlist("$version", "base"){
		drop if (online == 1)
		}
		
* (5.2.1) ----------------------------------------------------------------------

* Quick check on the frequencies of some questions
	foreach q in 25 40 62 63 {
		mat a_`q' = .
		forvalues o = 1(1)5 {
		qui:count if q`q'_`o' == 1
		mat a_`q' = a_`q', r(N)
		}
		mat li a_`q'
	}
	mat a = a_25 \ a_40 \ a_62 \ a_63
	mat rownames a = 25 40 62 63
	mat colnames a = . Spray TASER NoWeapon NotObserved 
	mat li a
	
	gen q40_combined = .
	foreach s in 1 2 3 4 5 {
	    replace q40_combined = `s' if q40_`s' == 1
	}
	
		*replace q41 = "." if q41 == ""
		tab q41 q40_combined if scenario2 == "Taggers"
	
* (5.2.1.p.1)					
	* Code into 1 and 0, with 1 for "Yes", 0 for "No", and empty for NA		
	foreach q in 19 34 45 20 35 46 {
		recode q`q' (1 = 1) (2 = 0) (3 = .), gen(q`q'_score)
	}

* (5.2.1.p.2) - A and B are not combinable		
		
	foreach q in 54 41{
	destring q`q', gen(q`q'_score) force
	recode q`q'_score  (0 = 0) (1/max = 1)								
	}
																				
	* Code zero for some missing with response in another item
	replace q54_score = 0 if inlist(1, q25_4) & missing(q54_score)
	replace q41_score = 0 if inlist(1, q40_4) & missing(q41_score)
	gen q41_score_taggers = q41_score
	
	* For q41: Only consider "Man and Wife"
	replace q41_score = . if scenario2 ~= "Man and Wife"
	
	* For q41: on "Taggers"
	replace q41_score_taggers = . if scenario2 ~= "Taggers"
	replace q41_score_taggers = 0 if scenario2 == "Taggers" & q41_score_taggers == 1 & q40_4 == 1
	replace q41_score_taggers = 1 if ///
		(q41_score_taggers == . & scenario2 == "Taggers" & q40_1 == 1)
	replace q41_score_taggers = 1 if scenario2 == "Taggers" & q40_3 == 1 
	
* (5.2.1.p.3) Recode to binary
	destring q55, gen(q55_score) force											
	recode q55_score (0 = 0) (1/max = 1)
	
	destring _v1, gen(q52_score) force											
	recode q52_score (0 = 0) (1/max = 1)
	replace q52_score = 1 if inlist(_v1, "2 gun shots also taser discharge", ///
	"2 taser 1 glock", "9 handgun 1taser", "Approximately 8 times")
	
	* Code zero for some missing with response in another item
	replace q55_score = 0 if inlist(1, q62_4) & missing(q55_score)
	replace q52_score = 0 if inlist(1, q63_4) & missing(q52_score)
	
	replace q52_score = . if scenario3 ~= "Street Stop 2"

* (5.2.2.s.1)  - Did the officer freeze
	foreach q in 18 33 44 {														
		recode q`q' (1 = 1) (2 = 0) (3 = .), gen(q`q'_score)
	}

* (5.2.2.s.2)
	foreach q in 21 36 47 22 37 48 {											
		recode q`q' (1 = 1) (2 = 0) (3 = .), gen(q`q'_score)
	}

* (5.2.2.s.2)
	foreach q in 23 38 49 {														
		recode q`q' (1 = 1) (2 = 0) (3 = .), gen(q`q'_score)
	}

* Save
	save ${out_rub}_${version}, replace


	
	
	
	
	
// CLEANING QUALTRICS RESPONSES : PART C ///////////////////////////////////////
// Section 5.3 -----------------------------------------------------------------

sample "$post_v"

		if inlist("$version", "base", "online") {
		    
		* For records that were vetted
		merge m:1 responseid using $dup_post, keep (1 2 3) nogen
		drop if (def_drop == 1 | unmatched == 1)
	
		* For incomplete survey: temporarily drop
		drop if progress < 100
		
		* For the two additional online survey - Decision made, confirmed by Oeindrila
		sort q1 startdate
		by q1: drop if (_n == 2) & inlist(q1, 3743)
		drop if inlist(q1, 2415)
		}
		
		if inlist("$version", "base"){
		drop if (online == 1)
		}
		
* Save
	save ${out_pos}_${version}, replace
	
